In [1]:
# Allow us to load `open_cp` without installing
import sys, os.path
sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
import descartes
import os
import numpy as np
import descartes
import open_cp.sources.chicago as chicago
import open_cp.naive
import open_cp.geometry
import open_cp.plot
datadir = os.path.join("//media", "disk", "Data")
#datadir = os.path.join("..", "..", "..", "..", "..", "Data")
In [3]:
chicago.set_data_directory(datadir)
south_side = chicago.get_side("South")
points = chicago.load(os.path.join(datadir, "chicago_two.csv"), {"BURGLARY"}, type="all_other")
In [4]:
points.number_data_points, points.time_range
Out[4]:
In [5]:
fig, ax = plt.subplots(ncols=2, figsize=(18,10))
ax[0].scatter(*points.coords, marker="+", alpha=0.05, color="Black")
ax[0].set_aspect(1)
ax[0].set_title("All Burglary crime")
pred = open_cp.naive.CountingGridKernel(250)
pred.data = points
risk = pred.predict()
matrix = np.ma.masked_where(risk.intensity_matrix==0, risk.intensity_matrix)
mappable = ax[1].pcolor(*risk.mesh_data(), matrix, cmap="Blues", edgecolor="none", linewidth=1)
ax[1].set_title("Count in each 250m square cell")
cbar = fig.colorbar(mappable, orientation="vertical", ax=ax[1])
cbar.set_label("Total crime count in region")
In [6]:
cdict = {'red': [(0.0, 1.0, 1.0),
(1.0, 1.0, 1.0)],
'green': [(0.0, 1.0, 1.0),
(1.0, 0.0, 0.0)],
'blue': [(0.0, 0.2, 0.2),
(1.0, 0.2, 0.2)]}
yellow_to_red = matplotlib.colors.LinearSegmentedColormap("yellow_to_red", cdict)
In [7]:
points_south_side = open_cp.geometry.intersect_timed_points(points, south_side)
points_south_side.number_data_points, points_south_side.time_range
Out[7]:
In [8]:
fig, ax = plt.subplots(ncols=2, figsize=(18,10))
ax[0].scatter(*points_south_side.coords, marker="+", alpha=0.05, color="Black")
ax[0].set_aspect(1)
ax[0].set_title("All Burglary crime")
pred = open_cp.naive.CountingGridKernel(250)
pred.data = points_south_side
risk = pred.predict()
matrix = np.ma.masked_where(risk.intensity_matrix==0, risk.intensity_matrix)
mappable = ax[1].pcolor(*risk.mesh_data(), matrix, cmap=yellow_to_red, edgecolor="black", linewidth=0.5)
ax[1].set_title("Count in each 250m square cell")
cax = fig.add_axes([0.9, 0.2, 0.01, 0.7])
cbar = fig.colorbar(mappable, orientation="vertical", cax=cax)
cbar.set_label("Total crime count in region")
xmin, ymin, xmax, ymax = south_side.bounds
for a in ax:
a.add_patch(descartes.PolygonPatch(south_side, fc="none", ec="Black"))
a.set(xlim=[xmin-500, xmax+500], ylim=[ymin-500, ymax+500])
a.set_aspect(1)
For KDE applications, it is interesting to think about the covariance matrix of the data. More precisely, the common approach to KDE is to first apply a Whitening transformation to the data, fit a KDE with a radially symmetric kernel, and then transform back.
In [9]:
import scipy.linalg
In [10]:
S = np.cov(points.coords)
S = scipy.linalg.inv(S)
S = scipy.linalg.cholesky(S)
x = points.coords - np.mean(points.coords, axis=1)[:,None]
x = np.dot(S, x)
fig, ax = plt.subplots(figsize=(8,8))
ax.scatter(*x)
ax.set_aspect(1)
ax.set(title="'Whitened' points")
S
Out[10]:
In [11]:
S = np.cov(points_south_side.coords)
S = scipy.linalg.inv(S)
S = scipy.linalg.cholesky(S)
x = points_south_side.coords - np.mean(points_south_side.coords, axis=1)[:,None]
x = np.dot(S, x)
fig, ax = plt.subplots(figsize=(8,8))
ax.scatter(*x)
ax.set_aspect(1)
ax.set(title="'Whitened' points for South Side")
S
Out[11]:
In [12]:
grid = open_cp.data.Grid(xsize=250, ysize=250, xoffset=0, yoffset=0)
masked_grid = open_cp.geometry.mask_grid_by_intersection(south_side, grid)
grid = open_cp.data.Grid(xsize=250, ysize=250, xoffset=125, yoffset=125)
masked_grid_off = open_cp.geometry.mask_grid_by_intersection(south_side, grid)
In [13]:
fig, ax = plt.subplots(ncols=2, figsize=(16,8))
for a in ax:
a.add_patch(descartes.PolygonPatch(south_side, fc="none", ec="Black"))
a.add_patch(descartes.PolygonPatch(south_side, fc="Blue", ec="none", alpha=0.2))
xmin, ymin, xmax, ymax = south_side.bounds
a.set(xlim=[xmin-500,xmax+500], ylim=[ymin-500,ymax+500])
pc = open_cp.plot.patches_from_grid(masked_grid)
ax[0].add_collection(matplotlib.collections.PatchCollection(pc, facecolor="None", edgecolor="black"))
pc = open_cp.plot.patches_from_grid(masked_grid_off)
ax[1].add_collection(matplotlib.collections.PatchCollection(pc, facecolor="None", edgecolor="black"))
None
In [ ]: